home *** CD-ROM | disk | FTP | other *** search
- {*********************************************************}
- {* HashRecM *}
- {* Copyright (c) Julian M Bucknall 1997 *}
- {* All rights reserved. *}
- {*********************************************************}
- {* Record manager using hash table as index *}
- {*********************************************************}
-
- {Note: this unit is released as freeware. In other words, you are free
- to use this unit in your own applications, however I retain all
- copyright to the code. JMB}
-
- unit HashRecM;
-
- interface
-
- uses
- {$IFDEF Windows}
- WinProcs, WinTypes,
- {$ELSE}
- Windows,
- {$ENDIF}
- SysUtils;
-
- type
- {$IFDEF Windows}
- TMemSize = word;
- {$ELSE}
- TMemSize = integer;
- {$ENDIF}
-
- type
- ThrmHashFunction = function (const S : string) : longint;
- {-Function type for a hash function for the record manager}
- ThrmGetKey = function (const aRecord) : string;
- {-Function type to retrieve the key string from a record}
-
- type
- ThrmHashRecordManager = class
- private
- hrmArray : pointer;
- hrmArraySize : TMemSize;
- hrmBucket : PByteArray;
- hrmBucketNo : longint;
- hrmBucketSize : TMemSize;
- hrmCount : longint;
- hrmDataHandle : THandle;
- hrmFileName : string;
- hrmGetKeyFunc : ThrmGetKey;
- hrmHashFunc : ThrmHashFunction;
- hrmIndexHandle : THandle;
- hrmMustFlush : boolean;
- hrmRecLen : integer;
- hrmRecsPerBucket : integer;
- hrmTableSize : integer;
- protected
- function hrmAddBucket : longint;
- function hrmFindPrim(const aKey : string;
- var aBucketNo : longint;
- var aBucketInx : integer) : boolean;
- procedure hrmGrowTable;
- function hrmHash(const aKey : string) : integer;
- function hrmGetKey(const aRecord) : string;
- procedure hrmReadBucket(aBucketNo : integer);
- procedure hrmUpdateBucket(aBucketNo : integer);
- procedure hrmUpdateIndex;
- public
- constructor Create(const aFileName : string;
- aRecLen : integer;
- aTableSize : integer;
- aHashFunc : ThrmHashFunction;
- aGetKeyFunc: ThrmGetKey);
- {-create a hashed record manager. aFileName is the root name
- of the data (.HTD) and index (.HTI) files. aRecLen is the
- record length. aTableSize is the number of elements in the
- hash table and is only required if a new table is being
- created, otherwise it is read from the index file. aHashFunc
- is the routine to hash a string. aGetKeyFunc is the routine
- that returns the key for a given record.}
- destructor Destroy; override;
- {-destroy the record manager. All memory is released, all
- files are closed}
-
- procedure Delete(const aKey : string);
- {-delete the record defined by aKey; an exception is raised
- if the record is not found}
- function Find(const aKey : string; var aRecord) : boolean;
- {-find the record defined by aKey; return true and the
- associated record if the string is found, otherwise false}
- procedure Insert(const aRecord);
- {-insert a new record; an exception is raised if the key
- generated from the record is already present}
-
- property Count : longint read hrmCount;
- {-current number of records in the file}
- property MustFlush : boolean
- read hrmMustFlush write hrmMustFlush;
- {-true if writes to the files must be flushed immediately}
-
- property RecLength : integer read hrmRecLen;
- {-record length}
- property RecsPerBucket : integer read hrmRecsPerBucket;
- {-number of records per bucket}
- property TableSize : integer read hrmTableSize;
- {-number of elements in the hash table}
- end;
-
- implementation
-
- type
- THashElementState = (hesEmpty, hesDeleted, hesInUse);
-
- THashElement = packed record
- heBucketNo : longint;
- end;
-
- PHashElementArray = ^THashElementArray;
- THashElementArray =
- array [0..pred(MaxInt div sizeof(THashElement))] of THashElement;
-
- {===Helper routines==================================================}
- procedure RaiseException(const S : string);
- begin
- raise Exception.Create(S);
- end;
- {--------}
- function GetClosestPrime(N : integer) : integer;
- {$I Primes.inc}
- const
- Forever = true;
- var
- L, R, M : integer;
- RootN : integer;
- IsPrime : boolean;
- DivisorIndex : integer;
- begin
- {treat 2 as a special case}
- if (N = 2) then begin
- Result := N;
- Exit;
- end;
- {make the result equal to N, and if it's even, the next odd number}
- if Odd(N) then
- Result := N
- else
- Result := succ(N);
- {if the result is within our prime number table, use binary search
- to find the equal or next highest prime number}
- if (Result <= MaxPrime) then begin
- L := 0;
- R := pred(PrimeCount);
- while (L <= R) do begin
- M := (L + R) div 2;
- if (Result = Primes[M]) then
- Exit
- else if (Result < Primes[M]) then
- R := pred(M)
- else
- L := succ(M);
- end;
- Result := Primes[L];
- Exit;
- end;
- {the result is outside our prime number table range, use the
- standard method for testing primality (do any of the primes up to
- the root of the number divide it exactly?) and continue
- incrementing the result by 2 until it is prime}
- if (Result <= (MaxPrime * MaxPrime)) then begin
- while Forever do begin
- RootN := round(Sqrt(Result));
- DivisorIndex := 1; {ignore the prime number 2}
- IsPrime := true;
- while (DivisorIndex < PrimeCount) and (RootN > Primes[DivisorIndex]) do begin
- if ((Result div Primes[DivisorIndex]) * Primes[DivisorIndex] = Result) then begin
- IsPrime := false;
- Break;
- end;
- inc(DivisorIndex);
- end;
- if IsPrime then
- Exit;
- inc(Result, 2);
- end;
- end;
- end;
- {====================================================================}
-
-
- {===File routines====================================================}
- procedure HRMClose(aHandle : THandle);
- begin
- if (aHandle <> 0) then
- FileClose(aHandle);
- end;
- {--------}
- procedure HRMDeleteFile(const aFileName : string);
- begin
- if not SysUtils.DeleteFile(aFileName) then
- RaiseException(Format('HashRecM.HRMDeleteFile: delete of %s failed',
- [aFileName]));
- end;
- {--------}
- procedure HRMFlush(aHandle : THandle);
- {$IFDEF Windows}
- var
- DosError : word;
- begin
- asm
- mov ah, $68
- mov bx, aHandle
- call DOS3Call
- jc @@Error
- xor ax, ax
- @@Error:
- mov DosError, ax
- end;
- if (DosError <> 0) then
- RaiseException('HashRecM.HRMFlush: flush failed')
- end;
- {$ELSE}
- begin
- if not FlushFileBuffers(aHandle) then
- RaiseException('HashRecM.HRMFlush: flush failed')
- end;
- {$ENDIF}
- {--------}
- function HRMOpen(const aFileName : string; aIsNew : boolean) : THandle;
- begin
- if aIsNew then begin
- Result := FileCreate(aFileName);
- if (Result = -1) then
- RaiseException('HashRecM.HRMOpen: could not create file');
- FileClose(Result);
- end;
- Result := FileOpen(aFileName, fmOpenReadWrite or fmShareExclusive);
- if (Result = -1) then
- RaiseException('HashRecM.HRMOpen: could not open file');
- end;
- {--------}
- procedure HRMRead(aHandle : THandle;
- aOffset : longint;
- aBufSize: longint;
- var aBuffer);
- var
- SeekOffset : longint;
- BytesRead : longint;
- begin
- SeekOffset := FileSeek(aHandle, aOffset, 0);
- if (SeekOffset = -1) then
- RaiseException('HashRecM.HRMRead: seek failed');
- BytesRead := FileRead(aHandle, aBuffer, aBufSize);
- if (BytesRead <> aBufSize) then
- RaiseException('HashRecM.HRMRead: read failed');
- end;
- {--------}
- procedure HRMRenameFile(const aOldFileName, aNewFileName : string);
- begin
- if not SysUtils.RenameFile(aOldFileName, aNewFileName) then
- RaiseException(Format('HashRecM.HRMRenameFile: rename of %s to %s failed',
- [aOldFileName, aNewFileName]));
- end;
- {--------}
- function HRMSeek(aHandle : THandle;
- aOffset : longint;
- aOrigin : longint) : longint;
- begin
- Result := FileSeek(aHandle, aOffset, aOrigin);
- if (Result = -1) then
- RaiseException('HashRecM.HRMSeek: seek failed');
- end;
- {--------}
- procedure HRMWrite(aHandle : THandle;
- aOffset : longint;
- aBufSize: longint;
- const aBuffer);
- var
- SeekOffset : longint;
- BytesWrit : longint;
- begin
- SeekOffset := FileSeek(aHandle, aOffset, 0);
- if (SeekOffset = -1) then
- RaiseException('HashRecM.HRMWrite: seek failed');
- BytesWrit := FileWrite(aHandle, aBuffer, aBufSize);
- if (BytesWrit <> aBufSize) then
- RaiseException('HashRecM.HRMWrite: write failed');
- end;
- {====================================================================}
-
-
- {===ThrmHashRecordManager===============================================}
- constructor ThrmHashRecordManager.Create(const aFileName : string;
- aRecLen : integer;
- aTableSize : integer;
- aHashFunc : ThrmHashFunction;
- aGetKeyFunc: ThrmGetKey);
- var
- FName : string;
- IndexFileSize : longint;
- IsNew : boolean;
- begin
- inherited Create;
-
- {open up the data and index files}
- hrmFileName := aFileName;
- FName := SysUtils.ChangeFileExt(aFileName, '.HTD');
- IsNew := not FileExists(FName);
- hrmDataHandle := HRMOpen(FName, IsNew);
- FName := SysUtils.ChangeFileExt(aFileName, '.HTI');
- hrmIndexHandle := HRMOpen(FName, IsNew);
-
- {using the index handle, work out the hash table size}
- IndexFileSize := HRMSeek(hrmIndexHandle, 0, 2);
- if (IndexFileSize = 0) then
- hrmTableSize := GetClosestPrime(aTableSize)
- else
- hrmTableSize := (IndexFileSize - sizeof(hrmCount)) div sizeof(THashElement);
-
- {create the hash table}
- hrmArraySize := hrmTableSize * sizeof(THashElement);
- GetMem(hrmArray, hrmArraySize);
-
- {read the old hash table or save the new empty hash table}
- if (IndexFileSize = 0) then begin
- FillChar(hrmArray^, hrmArraySize, $FF);
- hrmUpdateIndex;
- end
- else begin
- HRMRead(hrmIndexHandle, 0, sizeof(hrmCount), hrmCount);
- HRMRead(hrmIndexHandle, sizeof(hrmCount), hrmArraySize, hrmArray^);
- end;
-
- {calculate the bucket size, create a bucket buffer}
- hrmBucketSize := 4*1024;
- GetMem(hrmBucket, hrmBucketSize);
-
- {remember the functions, the record length, the record count per
- bucket}
- hrmHashFunc := aHashFunc;
- hrmGetKeyFunc := aGetKeyFunc;
- hrmRecLen := aRecLen;
- hrmRecsPerBucket := hrmBucketSize div succ(aRecLen);
- end;
- {--------}
- destructor ThrmHashRecordManager.Destroy;
- begin
- {force the index to be updated}
- if (hrmIndexHandle <> 0) then
- hrmUpdateIndex;
- {close the files}
- HRMClose(hrmDataHandle);
- HRMClose(hrmIndexHandle);
- {free memory}
- if (hrmArray <> nil) then
- FreeMem(hrmArray, hrmArraySize);
- if (hrmBucket <> nil) then
- FreeMem(hrmBucket, hrmBucketSize);
- {destroy ancestor}
- inherited Destroy;
- end;
- {--------}
- procedure ThrmHashRecordManager.Delete(const aKey : string);
- var
- BNo : longint;
- BInx : integer;
- begin
- if not hrmFindPrim(aKey, BNo, BInx) then
- RaiseException('ThrmHashRecordManager.Delete: key not found');
- hrmBucket^[BInx * succ(hrmRecLen)] := ord(hesDeleted);
- hrmUpdateBucket(BNo);
- dec(hrmCount);
- end;
- {--------}
- function ThrmHashRecordManager.Find(const aKey : string; var aRecord) : boolean;
- var
- BNo : longint;
- BInx : integer;
- begin
- if hrmFindPrim(aKey, BNo, BInx) then begin
- Result := true;
- Move(hrmBucket^[succ(BInx * succ(hrmRecLen))], aRecord, hrmRecLen);
- end
- else begin
- Result := false;
- end;
- end;
- {--------}
- function ThrmHashRecordManager.hrmAddBucket : longint;
- var
- EOF : longint;
- begin
- FillChar(hrmBucket^, hrmBucketSize, 0);
- EOF := HRMSeek(hrmDataHandle, 0, 2);
- Result := EOF div hrmBucketSize;
- HRMWrite(hrmDataHandle, EOF, hrmBucketSize, hrmBucket^);
- if MustFlush then
- HRMFlush(hrmDataHandle);
- hrmBucketNo := Result;
- end;
- {--------}
- function ThrmHashRecordManager.hrmFindPrim(const aKey : string;
- var aBucketNo : longint;
- var aBucketInx : integer) : boolean;
- var
- FirstDelBucket : longint;
- FirstDelBucketInx : integer;
- KeyHash : integer;
- FirstKeyHash : integer;
- RecOffset : longint;
- RecNo : integer;
- begin
- {Note: this routine either returns True to say the key was found or
- False if it wasn't. If True, aBucketNo is the bucket number
- in the data file and aBucketInx is the number of the record
- in the bucket (and the global hrmBucket field has the bucket
- in it). If False then aBucketNo/aBucketInx is the address of
- the first deleted record (and hrmBucket has the bucket in it)
- or aBucketNo is -1 to say that a new bucket is required. If
- aBucketInx is -1 then the hash table is completely full;
- otherwise aBucketInx is the element number in the hash table
- where the new bucket number is to be stored.}
-
- {assume we'll fail}
- Result := false;
- {we may need to make note of the first deleted bucket we find, so
- set the variable to some impossible value so that we know whether
- we found one yet}
- FirstDelBucket := -1;
- FirstDelBucketInx := -1;
- {calculate the hash for the string, make a note of it so we can find
- out when (if) we wrap around the table completely}
- KeyHash := hrmHash(aKey);
- FirstKeyHash := KeyHash;
- {do forever - we'll be exiting out of the loop when needed}
- while true do begin
- {with the current element...}
- with PHashElementArray(hrmArray)^[KeyHash] do begin
- {if the bucket number is -1 then the element is empty; we must
- stop the linear probe and return either this index or the first
- deleted one we encountered}
- if (heBucketNo = -1) then begin
- if (FirstDelBucket <> -1) then begin
- aBucketNo := FirstDelBucket;
- aBucketInx := FirstDelBucketInx;
- end
- else begin
- aBucketNo := -1;
- aBucketInx := KeyHash;
- end;
- Exit;
- end;
- {otherwise the element is used; retrieve the bucket from disk}
- hrmReadBucket(heBucketNo);
- {for each record in this bucket, check its state}
- for RecNo := 0 to pred(hrmRecsPerBucket) do begin
- RecOffset := RecNo * succ(hrmRecLen);
- case THashElementState(hrmBucket^[RecOffset]) of
- hesEmpty : begin
- {the state is 'empty', we must stop the
- linear probe and return either this bucket
- and index or the first deleted one we found}
- if (FirstDelBucket <> -1) then begin
- aBucketNo := FirstDelBucket;
- aBucketInx := FirstDelBucketInx;
- end
- else begin
- aBucketNo := heBucketNo;
- aBucketInx := RecNo;
- end;
- Exit;
- end;
- hesDeleted : begin
- {the state is 'deleted', we must make a note
- of this index if it's the first one we found
- and continue the linear probe}
- if (FirstDelBucket = -1) then begin
- FirstDelBucket := heBucketNo;
- FirstDelBucketInx := RecNo;
- end;
- end;
- hesInUse : begin
- {the state is 'in use', we check to see if
- it's our string, if it is, exit returning
- true and the index}
- if (hrmGetKey(hrmBucket^[succ(RecOffset)]) = aKey) then begin
- aBucketNo := heBucketNo;
- aBucketInx := RecNo;
- Result := true;
- Exit;
- end;
- end;
- else
- {bad news}
- RaiseException('ThrmHashRecordManager.hrmFindPrim: invalid element state')
- end;{case}
- end;
- end;
- {we didn't find the key or an empty slot this time around, so
- increment the index (taking care of the wraparound) and exit if
- we've got back to the start again}
- inc(KeyHash);
- if (KeyHash = hrmTableSize) then
- KeyHash := 0;
- if (KeyHash = FirstKeyHash) then begin
- if (FirstDelBucket <> -1) then begin
- aBucketNo := FirstDelBucket;
- aBucketInx := FirstDelBucketInx;
- end
- else begin
- aBucketNo := -1;
- aBucketInx := -1;
- end;
- Exit;
- end;
- end;{forever loop}
- end;
- {--------}
- procedure ThrmHashRecordManager.hrmGrowTable;
- var
- UndoLevel : integer;
- Bucket, Inx : integer;
- NewTableSize : integer;
- OldTableSize : integer;
- OldArraySize : TMemSize;
- NewArraySize : TMemSize;
- OldCount : longint;
- RecOffset : integer;
- BucketCount : longint;
- OldDataHandle: THandle;
- OldBucket : PByteArray;
- NewArray : PHashElementArray;
- OldArray : PHashElementArray;
- FName, TempFName : string;
- begin
- {allocate a bucket for our use}
- GetMem(OldBucket, hrmBucketSize);
- try
- {save the old array, element count, etc}
- OldArray := PHashElementArray(hrmArray);
- OldArraySize := hrmArraySize;
- OldTableSize := hrmTableSize;
- OldCount := hrmCount;
- try
- {track the amount of work we do, in case something goes wrong
- and we have to undo - we shall aim to leave the record manager
- in the same state is was when we started if an error occurs}
- UndoLevel := 0;
- {allocate a new array roughly twice as large as before}
- NewTableSize := GetClosestPrime(succ(hrmTableSize * 2));
- NewArraySize := NewTableSize * sizeof(THashElement);
- GetMem(NewArray, NewArraySize);
- FillChar(NewArray^, NewArraySize, $FF);
- {set the new data}
- hrmArray := NewArray;
- hrmArraySize := NewArraySize;
- hrmTableSize := NewTableSize;
- hrmCount := 0;
- UndoLevel := 1;
- {calculate the number of buckets in the current data file}
- BucketCount := HRMSeek(hrmDataHandle, 0, 2) div hrmBucketSize;
- {close the current data file, rename it, open it again}
- FName := SysUtils.ChangeFileExt(hrmFileName, '.HTD');
- TempFName := SysUtils.ChangeFileExt(hrmFileName, '.SAV');
- HRMClose(hrmDataHandle);
- UndoLevel := 2;
- HRMRenameFile(FName, TempFName);
- UndoLevel := 3;
- OldDataHandle := HRMOpen(TempFName, false);
- UndoLevel := 4;
- {create a new data file}
- hrmDataHandle := HRMOpen(FName, true);
- UndoLevel := 5;
- {read through the old file and transfer over the records}
- for Bucket := 0 to pred(BucketCount) do begin
- HRMRead(OldDataHandle, (Bucket * hrmBucketSize), hrmBucketSize, OldBucket^);
- for Inx := 0 to pred(hrmRecsPerBucket) do begin
- RecOffset := Inx * succ(hrmRecLen);
- if (THashElementState(OldBucket^[RecOffset]) = hesInUse) then begin
- Insert(OldBucket^[succ(RecOffset)]);
- end;
- end;
- end;
- {close the old handle and delete the file}
- try
- HRMClose(OldDataHandle);
- HRMDeleteFile(TempFName);
- except
- end;
- {free the old hash array}
- FreeMem(OldArray, OldTableSize * sizeof(THashElement));
- {note that we don't need to ensure the index file is updated,
- since the Inserts above will have done so}
- except
- if (UndoLevel >= 5) then begin
- HRMClose(hrmDataHandle);
- HRMDeleteFile(FName);
- end;
- if (UndoLevel >= 4) then
- HRMClose(OldDataHandle);
- if (UndoLevel >= 3) then
- HRMRenameFile(TempFName, FName);
- if (UndoLevel >= 2) then
- hrmDataHandle := HRMOpen(FName, false);
- if (UndoLevel >= 1) then
- FreeMem(NewArray, NewTableSize * sizeof(THashElement));
- hrmArray := OldArray;
- hrmArraySize := OldArraySize;
- hrmTableSize := OldTableSize;
- hrmCount := OldCount;
- raise;
- end;
- finally
- FreeMem(OldBucket, hrmBucketSize);
- end;
- end;
- {--------}
- function ThrmHashRecordManager.hrmHash(const aKey : string) : integer;
- begin
- if not Assigned(hrmHashFunc) then
- RaiseException('ThrmHashRecordManager.hrmHash: no hash function defined');
- Result := hrmHashFunc(aKey) mod hrmTableSize
- end;
- {--------}
- function ThrmHashRecordManager.hrmGetKey(const aRecord) : string;
- begin
- if not Assigned(hrmGetKeyFunc) then
- RaiseException('ThrmHashRecordManager.hrmGetKey: no get key function defined');
- Result := hrmGetKeyFunc(aRecord);
- end;
- {--------}
- procedure ThrmHashRecordManager.hrmReadBucket(aBucketNo : integer);
- begin
- HRMRead(hrmDataHandle, (aBucketNo * hrmBucketSize), hrmBucketSize, hrmBucket^);
- hrmBucketNo := aBucketNo;
- end;
- {--------}
- procedure ThrmHashRecordManager.hrmUpdateBucket(aBucketNo : integer);
- begin
- HRMWrite(hrmDataHandle,
- (aBucketNo * hrmBucketSize),
- hrmBucketSize,
- hrmBucket^);
- if MustFlush then
- HRMFlush(hrmDataHandle);
- end;
- {--------}
- procedure ThrmHashRecordManager.hrmUpdateIndex;
- begin
- HRMWrite(hrmIndexHandle, 0, sizeof(hrmCount), hrmCount);
- HRMWrite(hrmIndexHandle, sizeof(hrmCount), hrmArraySize, hrmArray^);
- if MustFlush then
- HRMFlush(hrmIndexHandle);
- end;
- {--------}
- procedure ThrmHashRecordManager.Insert(const aRecord);
- var
- BNo : longint;
- BInx : integer;
- RecOffset : longint;
- begin
- if hrmFindPrim(hrmGetKey(aRecord), BNo, BInx) then
- RaiseException('ThrmHashRecordManager.Insert: duplicate key');
- if (BNo = -1) then begin
- if (BInx = -1) then
- RaiseException('ThrmHashRecordManager.Insert: table is full');
- BNo := hrmAddBucket;
- PHashElementArray(hrmArray)^[BInx].heBucketNo := BNo;
- hrmUpdateIndex;
- BInx := 0;
- end;
- if (BNo <> hrmBucketNo) then
- hrmReadBucket(BNo);
- RecOffset := BInx * succ(hrmRecLen);
- hrmBucket^[RecOffset] := ord(hesInUse);
- Move(aRecord, hrmBucket^[succ(RecOffset)], hrmRecLen);
- hrmUpdateBucket(BNo);
- inc(hrmCount);
- if ((hrmCount * 3) > (longint(hrmTableSize) * 2 * hrmRecsPerBucket)) then
- hrmGrowTable;
- end;
- {====================================================================}
-
- end.
-